#!/usr/bin/perl -w

#=======================================================================
# html2db
# File ID: 2aad61e8-f743-11dd-8708-000475e441b9
# Simple HTML to DocBook conversion utility.
#
# Character set: UTF-8
# ©opyleft 2004– Øyvind A. Holm <sunny@sunbase.org>
# License: GNU General Public License version 2 or later, see end of 
# file for legal stuff.
#=======================================================================

use strict;
use Getopt::Long;

$| = 1;

our $Debug = 0;

our %Opt = (

    'debug' => 0,
    'help' => 0,
    'verbose' => 0,
    'version' => 0,

);
my $Warn = "<!-- \@html2db -->";

our $progname = $0;
$progname =~ s/^.*\/(.*?)$/$1/;
our $VERSION = "0.00";

Getopt::Long::Configure("bundling");
GetOptions(

    "debug" => \$Opt{'debug'},
    "help|h" => \$Opt{'help'},
    "verbose|v+" => \$Opt{'verbose'},
    "version" => \$Opt{'version'},

) || die("$progname: Option error. Use -h for help.\n");

$Opt{'debug'} && ($Debug = 1);
$Opt{'help'} && usage(0);
if ($Opt{'version'}) {
    print_version();
    exit(0);
}

my $Orig = join("", <>);
my $Data = $Orig;
my $H1_kludge = "<h1>This is just a placeholder by html2db. It should not be here.</h1>";

if ($Data =~ m#</body\b#s) {
    $Data =~ s#</body\b(.*?)$#$H1_kludge</body$1#s;
} else {
    $Data .= $H1_kludge;
}

$Data =~ s#<!DOCTYPE .*?>#<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4//EN" "http://docbook.org/xml/4.3/docbookx.dtd">#s;
$Data = conv_elem("html", "article", $Data);
$Data = conv_elem("head", "articleinfo", $Data);
$Data = conv_elem("em", "emphasis", $Data);
$Data = conv_elem("samp", "computeroutput", $Data);
$Data = conv_elem("code", "filename", $Data, 1);
$Data = conv_elem("kbd", "command", $Data);
$Data = conv_elem("p", "para", $Data);
$Data = conv_elem("ul", "itemizedlist", $Data);
$Data = conv_elem("ol", "orderedlist", $Data);
$Data = conv_elem("li", "listitem", $Data, 0, 1);
$Data = conv_elem("dl", "variablelist", $Data);
$Data = conv_elem("dt", "term", $Data);
$Data = conv_elem("dd", "listitem", $Data, 0, 1);
$Data = conv_elem("pre", "screen", $Data);
$Data = conv_h(6, $Data);
$Data = conv_h(5, $Data);
$Data = conv_h(4, $Data);
$Data = conv_h(3, $Data);
$Data = conv_h(2, $Data);
$Data = conv_h(1, $Data);

$Data =~ s#<a\s+href="(.*?)">(.+?)</a>#<ulink url="$1">$2</ulink>#gs;

D("\$Data before cleanup \x7B\x7B\x7B\n$Data\n\x7D\x7D\x7D");

$Data =~ s/$H1_kludge//;

print($Data);

sub conv_elem {
    # {{{
    my ($From, $To, $Str, $Check, $InsPara) = @_;
    defined($Check) || ($Check = 0);
    defined($InsPara) || ($InsPara = 0);
    my $chk_str = $Check ? "$Warn" : "";
    my ($Par1, $Par2) =
       (   "",    "");
    if ($InsPara) {
        $Par1 = "<para>";
        $Par2 = "</para>";
    }

    $Str =~
    s{
        <$From\b(.*?)>(.*?)</$From\b(.*?)>
    }
    {
        my ($Attrib, $Txt, $SubAttr) =
           (     $1,   $2,       $3);
        D("Er i regexp");
        $Attrib =~ s/^\s*(.*?)\s*$/$1/s;
        length($Attrib) && ($Attrib = " $Attrib", $chk_str = $Warn);
        "$chk_str<$To$Attrib>$Par1$Txt$Par2</$To>";
    }gsex;
    return($Str);
    # }}}
}

sub conv_h {
    # {{{
    my ($Level, $Str) = @_;
    my $end_head = "";

    for (my $Tmp = $Level; $Tmp >= 1; $Tmp--) {
        $end_head .= $Tmp;
    }
    my $reg_str = <<END;
        <(h$Level)\\b(.*?)>
        (.*?)
        </h$Level\\b(.*?)>
        (.*?)
        <(h[$end_head])\\b(.*?)>
END
    D("reg_str = \x7B\x7B\x7B\n$reg_str\n\x7D\x7D\x7D");
    $Str =~
    s{
        $reg_str
    }
    {
        my ($Elem1, $Attrib, $Header, $SubAttr1, $Txt, $Elem2, $SubAttr2) =
           (     $1,     $2,      $3,        $4,   $5,     $6,        $7);
        D("conv_h(): Er i regexp");
        $Attrib =~ s/^\s*(.*?)\s*$/$1/s;
        length($Attrib) && ($Attrib = " $Warn $Attrib");
        "<sect$Level$Attrib> <title>$Header</title>\n$Txt\n</sect$Level>\n<$Elem2$SubAttr2>";
    }gsex;
    return($Str);
    # }}}
}

sub print_version {
    # Print program version {{{
    print("$progname v$VERSION\n");
    # }}}
} # print_version()

sub usage {
    # Send the help message to stdout {{{
    my $Retval = shift;

    if ($Opt{'verbose'}) {
        print("\n");
        print_version();
    }
    print(<<END);

Usage: $progname [options] [file [files [...]]]

Experimental script for converting XHTML to DocBook.

To get all the headers converted, the files have to be filtered through 
the script several times. And there will probably be some <hX> headers 
which needs manual conversion. The curse of <h?> elements.

Options:

  -h, --help
    Show this help.
  -v, --verbose
    Increase level of verbosity. Can be repeated.
  --version
    Print version information.
  --debug
    Print debugging messages.

END
    exit($Retval);
    # }}}
} # usage()

sub msg {
    # Print a status message to stderr based on verbosity level {{{
    my ($verbose_level, $Txt) = @_;

    if ($Opt{'verbose'} >= $verbose_level) {
        print(STDERR "$progname: $Txt\n");
    }
    # }}}
} # msg()

sub D {
    # Print a debugging message {{{
    $Debug || return;
    my @call_info = caller;
    chomp(my $Txt = shift);
    my $File = $call_info[1];
    $File =~ s#\\#/#g;
    $File =~ s#^.*/(.*?)$#$1#;
    print(STDERR "$File:$call_info[2] $$ $Txt\n");
    return("");
    # }}}
} # D()

__END__

# Plain Old Documentation (POD) {{{

=pod

=head1 NAME



=head1 SYNOPSIS

 [options] [file [files [...]]]

=head1 DESCRIPTION



=head1 OPTIONS

=over 4

=item B<-h>, B<--help>

Print a brief help summary.

=item B<-v>, B<--verbose>

Increase level of verbosity. Can be repeated.

=item B<--version>

Print version information.

=item B<--debug>

Print debugging messages.

=back

=head1 BUGS



=head1 AUTHOR

Made by Øyvind A. Holm S<E<lt>sunny@sunbase.orgE<gt>>.

=head1 COPYRIGHT

Copyleft © Øyvind A. Holm E<lt>sunny@sunbase.orgE<gt>
This is free software; see the file F<COPYING> for legalese stuff.

=head1 LICENCE

This program is free software: you can redistribute it and/or modify it 
under the terms of the GNU General Public License as published by the 
Free Software Foundation, either version 2 of the License, or (at your 
option) any later version.

This program is distributed in the hope that it will be useful, but 
WITHOUT ANY WARRANTY; without even the implied warranty of 
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along 
with this program.
If not, see L<http://www.gnu.org/licenses/>.

=head1 SEE ALSO

=cut

# }}}

# vim: set fenc=UTF-8 ft=perl fdm=marker ts=4 sw=4 sts=4 et fo+=w :
